#!/usr/bin/python
# Copyright (c) 2011 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.


import optparse
import os
import re
import sys
import time
import urllib2


__version__ = '1.0'
BUILDER_URL = 'http://build.chromium.org/p/chromium.perf/builders/%s'
BUILD_URL = 'http://build.chromium.org/p/chromium.perf/builders/%s/builds/%s'
STEP_STDIO_URL = ('http://build.chromium.org/p/chromium.perf/builders/%s/'
                  'builds/%s/steps/%s/logs/stdio')
USAGE = ''

# The revision range where the issue occurred.
LOWER_REV = 69491
UPPER_REV = 70280

# These were the active bots running the page cycler at that time.
BUILDERS = {
  'Linux%20Perf%20(1)': {'lower': 2035, 'upper': 2389},
  'Linux%20Perf%20(lowmem)': {'lower': 165, 'upper': 317},
  'Mac10.5%20Perf(1)': {'lower': 1018, 'upper': 1209},
  'Mac10.6%20Perf(1)': {'lower': 1509, 'upper': 1800},
  'Vista%20Perf%20(1)': {'lower': 2135, 'upper': 2471},
  'Vista%20Perf%20(dbg)': {'lower': 1438, 'upper': 1738},
  'Vista%20Perf%20(jank)': {'lower': 249, 'upper': 417},
  'XP%20Perf%20(1)': {'lower': 2035, 'upper': 2361},
  'XP%20Perf%20(dbg)': {'lower': 1081, 'upper': 1287},
  'XP%20Perf%20(jank)': {'lower': 406, 'upper': 552},
}

# These were the active page cycler steps at that time.
STEPS = [
  'page_cycler_moz',
  'page_cycler_morejs',
  'page_cycler_intl1',
  'page_cycler_intl2',
  'page_cycler_dhtml',
  'page_cycler_database',
  'page_cycler_indexeddb',
  'page_cycler_moz-http',
  'page_cycler_bloat-http',
]


def GetBuilderUrl(builder):
  return BUILDER_URL % builder


def GetBuildUrl(builder, buildnumber):
  return BUILD_URL % (builder, buildnumber)


def GetStepStdioUrl(builder, buildnumber, test):
  return STEP_STDIO_URL % (builder, buildnumber, test)


# Floating point representation of last time we fetched a URL.
last_fetched_at = None
def FetchUrlContents(url):
  global last_fetched_at
  if last_fetched_at and ((time.time() - last_fetched_at) <= 0.1):
    # Sleep for a tenth of a second to avoid overloading the server.
    time.sleep(0.1)
  try:
    last_fetched_at = time.time()
    connection = urllib2.urlopen(url)
  except urllib2.HTTPError, e:
    if e.code == 404:
      return None
    raise e
  text = connection.read().strip()
  connection.close()
  return text


def GetLatestBuildFromUrl(builder):
  text = FetchUrlContents(GetBuilderUrl(builder))
  if not text:
    # Master doesn't know about this builder.
    raise Exception('no such builder %s' % builder)
  matching = False
  for line in text.splitlines():
    # First wait until we get to "Recent Builds".
    match_data = re.match(r'.*Recent Builds.*', line)
    if match_data:
      matching = True

    # If we've made it to "Recent Builds", then look for the first build line.
    if matching:
      match_data = re.match(r'.*/builds/(\d+)".*', line)
      if match_data:
        return int(match_data.group(1))
  # Malformed output.
  raise Exception('master returned output missing latest build number')


latestdata = {}
def GetLatestBuild(builder):
  global latestdata
  if builder not in latestdata:
    latestdata[builder] = GetLatestBuildFromUrl(builder)
  return latestdata[builder]


def GetHighestRevisionForBuildFromUrl(builder, build):
  url = GetBuildUrl(builder, build)
  text = FetchUrlContents(url)
  if not text:
    # Master doesn't know about this build.
    return None
  # Match both 'Got Revision: ' and 'Revision: '.
  match_data = re.match(r'.*Revision: (\d+).*', text, re.DOTALL)
  if not match_data:
    # Malformed output.
    raise Exception('master returned output missing revision number, '
                    'url = %s' % url)
  return int(match_data.group(1))


builderdata = {}
def GetHighestRevisionForBuild(builder, build):
  global builderdata
  if builder not in builderdata:
    builderdata[builder] = {}
  if build not in builderdata[builder]:
    builderdata[builder][build] = GetHighestRevisionForBuildFromUrl(builder,
                                                                    build)
  return builderdata[builder][build]


def GetBuildRange(builder, build):
  thisbuild_revmax = GetHighestRevisionForBuild(builder, build)
  prevbuild_revmax = GetHighestRevisionForBuild(builder, build-1)
  if prevbuild_revmax and thisbuild_revmax == prevbuild_revmax:
    # If this build and the previous build are for the same one revision, let
    # this build's min revision be the same as its max.
    thisbuild_revmin = prevbuild_revmax
  elif prevbuild_revmax:
    # If this build is newer than the previous build, let this build's min
    # revision be one revision newer than the previous build's max revision.
    thisbuild_revmin = prevbuild_revmax + 1
  else:
    # No data was available for the previous build, so we don't know this
    # build's minimum revision.
    thisbuild_revmin = None
  if thisbuild_revmax < thisbuild_revmin:
    raise Exception('ERROR: builder %s build %s revmax is less than revmin' % (
        builder, build))
  return (thisbuild_revmin, thisbuild_revmax)


FIND_TOP_REV = [
  ['TopRevInBuild', 'found'],
  ['BottomRevInBuild', 'buildmin'],
  ['BuildBetweenRevs', 'buildmin'],
  ['BuildBelowBottomRev', 'buildmin'],
  ['BuildAboveTopRev', 'buildmax'],
]


FIND_BOTTOM_REV = [
  ['BottomRevInBuild', 'found'],
  ['TopRevInBuild', 'buildmax'],
  ['BuildBetweenRevs', 'buildmax'],
  ['BuildBelowBottomRev', 'buildmin'],
  ['BuildAboveTopRev', 'buildmax'],
]


def BisectBuildRange(builder, routines, lowerbuild, upperbuild):
  if upperbuild < lowerbuild:
    raise Exception('ERROR: builder %s upperbuild %s cannot be less than '
                    'lowerbuild %s' % (builder, upperbuild, lowerbuild))

  buildmin = lowerbuild
  buildmax = upperbuild
  buildguess = buildmax
  iteration = 0
  while ((buildmax - buildmin) > 0):
    iteration += 1
    (revmin, revmax) = GetBuildRange(builder, buildguess)
    bottom_rev_in_build = (revmin <= LOWER_REV <= revmax)
    top_rev_in_build = (revmin <= UPPER_REV <= revmax)
    build_between_revs = (LOWER_REV < revmin <= revmax < UPPER_REV)
    build_below_bottom_rev = (revmin < LOWER_REV)
    build_above_top_rev = (UPPER_REV < revmax)
    for routine in routines:
      if (routine[0] == 'BottomRevInBuild' and bottom_rev_in_build):
        if routine[1] == 'found': return buildguess
        elif routine[1] == 'buildmin': buildmin = buildguess
        elif routine[1] == 'buildmax': buildmax = buildguess
      elif (routine[0] == 'TopRevInBuild' and top_rev_in_build):
        if routine[1] == 'found': return buildguess
        elif routine[1] == 'buildmin': buildmin = buildguess
        elif routine[1] == 'buildmax': buildmax = buildguess
      elif (routine[0] == 'BuildBetweenRevs' and build_between_revs):
        if routine[1] == 'buildmin': buildmin = buildguess
        elif routine[1] == 'buildmax': buildmax = buildguess
      elif (routine[0] == 'BuildBelowBottomRev' and build_below_bottom_rev):
        if routine[1] == 'buildmin': buildmin = buildguess
        elif routine[1] == 'buildmax': buildmax = buildguess
      elif (routine[0] == 'BuildAboveTopRev' and build_above_top_rev):
        if routine[1] == 'buildmin': buildmin = buildguess
        elif routine[1] == 'buildmax': buildmax = buildguess
    buildguess = buildmin + int((buildmax - buildmin) / 2)

    if iteration > 20:
      raise Exception('ERROR: failed to bisect within 20 attempts')
  return None


def SaveStdio(builder, build, step, stdio):
  build = str(build)
  buildpath = os.path.join(builder, build)
  steppath = os.path.join(builder, build, step)
  if not os.path.exists(builder):
    os.mkdir(builder)
  if not os.path.exists(buildpath):
    os.mkdir(buildpath)
  stepfile = open(steppath, 'w')
  stepfile.write(stdio)
  stepfile.close()



def Main(args):
  parser = optparse.OptionParser(usage=USAGE, version=__version__)
  parser.add_option('-s', '--build-range', action='store_true',
                    dest='build_range', default=False,
                    help='get builder build ranges')
  parser.add_option('-d', '--download', action='store_true',
                    dest='download', default=False,
                    help='download page cycler data')
  parser.add_option('-q', '--quick-mode', action='store_true',
                    dest='quick_mode', default=False,
                    help='only download oldest available output')
  options, args = parser.parse_args(args)

  # Change to this script's directory.
  os.chdir(os.path.dirname(os.path.abspath(__file__)))

  if options.build_range:
    builder_keys = BUILDERS.keys()
    builder_keys.sort()
    print 'BUILDERS = {'
    for builder in builder_keys:
      if BUILDERS[builder]['lower'] and BUILDERS[builder]['upper']:
        print '  \'%s\': {\'lower\': %d, \'upper\': %d},' % (
            builder, BUILDERS[builder]['lower'], BUILDERS[builder]['upper'])
        continue
      latest_build = GetLatestBuild(builder)
      if not BUILDERS[builder]['lower']:
        BUILDERS[builder]['lower'] = BisectBuildRange(
            builder, FIND_BOTTOM_REV, 1, latest_build)
      if not BUILDERS[builder]['upper']:
        BUILDERS[builder]['upper'] = BisectBuildRange(
            builder, FIND_TOP_REV, 1, latest_build)
      print '  \'%s\': {\'lower\': %d, \'upper\': %d},' % (
          builder, BUILDERS[builder]['lower'], BUILDERS[builder]['upper'])
    print '}'

  elif options.download:
    builder_keys = BUILDERS.keys()
    builder_keys.sort()
    for builder in builder_keys:
      if not BUILDERS[builder]['lower'] or not BUILDERS[builder]['upper']:
        print 'skipping builder %s' % builder
        continue
      build_stdio_present = 0
      for build in range(BUILDERS[builder]['lower'],
                         BUILDERS[builder]['upper']):
        stdio_retrieved = False
        stdio_present = False

        for step in STEPS:
          # Skip files that already exist.
          if os.path.exists(os.path.join(builder, str(build), step)):
            continue
          stdio = FetchUrlContents(GetStepStdioUrl(builder, build, step))
          stdio_retrieved = True
          if stdio:
            stdio_present = True
            SaveStdio(builder, build, step, stdio)
          else:
            SaveStdio(builder, build, step, 'missing')
            if options.quick_mode:
              # Break out of downloading the rest of the steps for now since
              # we assume they're also missing.
              break

        if stdio_retrieved:
          print '%s/%s' % (builder, build)
        if stdio_present:
          build_stdio_present += 1

        # Now since we want to prefer finding the oldest data, break to the next
        # builder.
        if options.quick_mode and (build_stdio_present > 10):
          break


if __name__ == '__main__':
  sys.exit(Main(sys.argv))
